library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(rvest)
##
## Attaching package: 'rvest'
##
## The following object is masked from 'package:readr':
##
## guess_encoding
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(geodata)
## Loading required package: terra
## terra 1.7.78
##
## Attaching package: 'terra'
##
## The following object is masked from 'package:tidyr':
##
## extract
library(raster)
## Loading required package: sp
##
## Attaching package: 'raster'
##
## The following object is masked from 'package:dplyr':
##
## select
library(lubridate)
library(terra)
library(fields)
## Loading required package: spam
## Spam version 2.11-0 (2024-10-03) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
##
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
##
## Loading required package: viridisLite
##
## Try help(fields) to get started.
##
## Attaching package: 'fields'
##
## The following object is masked from 'package:geodata':
##
## world
##
## The following object is masked from 'package:terra':
##
## describe
library(httr)
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
# Scrap wildfire data from 2012 to 2017 from BC government website.
historical = read_html("https://www2.gov.bc.ca/gov/content/safety/wildfire-status/about-bcws/wildfire-statistics")
historical <- historical |>
html_table(fill = TRUE)
historical <- as_tibble(historical[[1]])
historical <- historical |>
arrange(Year)
head(historical)
## # A tibble: 6 × 8
## Year `Fire Number` `Fire Centre` Latitude Longitude Geographic
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 2012 C10075 (2012) Cariboo 52 44.747 124 22.619 Bald Face Mountain
## 2 2012 C10118 (2012) Cariboo 52 41.849 123 00.607 Quesnel-Merston Creek
## 3 2012 C10247 (2012) Cariboo 53 03.159 122 58.020 11B Road Blackwater
## 4 2012 C10250 (2012) Cariboo 52 30.773 122 26.629 Margeurite
## 5 2012 C20005 (2012) Cariboo 52 07.676 121 54.904 150 Mile House-330 Val…
## 6 2012 C20009 (2012) Cariboo 52 06.000 121 59.000 Sugarcane IR
## # ℹ 2 more variables: `Discovery Date` <chr>, `Size (ha)` <dbl>
download_path <- "/Users/marlowe/Desktop/DATA-6200-Data-Manipulation-Visualization/Assignment 2"
provinces <- st_as_sf(geodata::gadm("CAN", level = 1, download = TRUE, path = download_path))
bc_boundary <- provinces[provinces$NAME_1 == "British Columbia", ]
plot(bc_boundary$geometry)
# Download temperature data and elevation data from geodata package
temperature_data <- worldclim_country(
country = "CAN",
var = "tmax",
res = "2.5",
lon = -126, lat = 53,
version = "2.1",
path = download_path)
elevation_data <- elevation_30s(country = "CAN", region = bc_boundary, path = download_path)
Since high temperature generally contribute more to dry environment
and potentially more wildfires, I chose to download the max temperature
data (see the argument var = "tmax".
# Crop and mask temperature data and elevation data
month_names <- month.abb # Abbreviations for months Jan, Feb, ..., Dec
names(temperature_data) <- month_names
temperature_cropped <- crop(temperature_data, extent(bc_boundary))
temperature_masked <- mask(temperature_cropped, bc_boundary)
elevation_cropped <- crop(elevation_data, extent(bc_boundary))
elevation_masked <- mask(elevation_cropped, bc_boundary)
# Plot layout
par(mfrow = c(3, 4),oma = c(0, 0, 6, 0))
for (i in 1:12) {
plot(temperature_masked[[i]], main = paste(month_names[i]))
}
mtext("Average Monthly Temperature", outer = TRUE, cex = 1.5, line = 2)
mtext("Temperature data across British Columbia for 2012-2017", outer = TRUE, cex = 1, line = 0)
plot(elevation_masked, main = "Elevation Data for British Columbia")
names(historical) <- c("year", "fire_number", "fire_centre", "latitude", "longitude", "geographic", "discovery_date", "size")
convert_to_double <- function(str) {
parts <- str_split(str, " ") |> unlist() # Split into degrees and minutes
degrees <- as.numeric(parts[1])
minutes <- as.numeric(parts[2])
float <- degrees + minutes / 60
return(float)
}
# Convert latitude and longitude column to doubles
historical <- historical |>
mutate(
latitude = sapply(latitude, convert_to_double),
longitude = sapply(longitude, convert_to_double),
longitude = -longitude)
historical <- historical |>
mutate(
month = str_extract(discovery_date, "\\b[A-Za-z]+\\b"), # Extract first word -> month
month = factor(month, levels = month.name, labels = month.abb)) # convert month to abbreviations
# Set up plot layout
par(mfrow = c(3, 4),oma = c(0, 0, 5, 0))
for (i in 1:12) {
month_name <- month.abb[i] # Abbreviation
# Filter fire data for the month in current loop
fire_data_month <- historical |>
filter(month == month_name)
if (nrow(fire_data_month) > 0) {
fire_data_month_sf <- st_as_sf(fire_data_month, coords = c("longitude", "latitude"), crs = 4326)
fire_data_month_sf <- st_transform(fire_data_month_sf, crs = st_crs(temperature_masked))
plot(temperature_masked[[i]], main = paste(month_name), alpha = 1)
plot(st_geometry(fire_data_month_sf), add = TRUE, col = "red", pch = 20, cex = 0.2)
} else { # If there are no fire data points for the month, only plot the temperature
plot(temperature_masked[[i]], main = paste(month_name), alpha = 1)
text(0, 0, labels = paste("No fires in", month_name), cex = 1.5, col = "black")
}
}
mtext("Wildfire Distribution over Temperature", outer = TRUE, cex = 1.5, line = 2)
As the graph shows, wildfires tends to occur more frequently when the temperature is higher, which is in summer. Here’s another graph that shows how the number of wildfires vary over months.
fire_counts_by_month <- historical |>
group_by(month) |>
summarize(count = n()) |>
complete(month = month.abb, fill = list(count = 0)) |>
mutate(month = factor(month, levels = month.abb))
# Plot bar chart of wildfire counts by month
par(mfrow = c(1, 1))
barplot(
fire_counts_by_month$count[order(fire_counts_by_month$month)],
names.arg = month.abb,
col = "orange",
xlab = "Month",
ylab = "Number of Wildfires",
main = "Wildfire Counts by Month",
ylim = c(0, max(fire_counts_by_month$count) * 1.05))
Wildfires occurs the most between April and August.
# Set up plot layout
par(mfrow = c(2, 3), oma = c(0, 0, 6, 0))
for (i in 4:9) {
month_name <- month.abb[i] # Abbreviation
# Filter fire data for the month in current loop
fire_data_month <- historical |>
filter(month == month_name)
if (nrow(fire_data_month) > 0) {
fire_data_month_sf <- st_as_sf(fire_data_month, coords = c("longitude", "latitude"), crs = 4326)
fire_data_month_sf <- st_transform(fire_data_month_sf, crEs = st_crs(temperature_masked))
plot(temperature_masked[[i]], main = paste(month_name), alpha = 1)
plot(st_geometry(fire_data_month_sf), add = TRUE, col = "red", pch = 20, cex = 0.3)
} else { # If there are no fire data points for the month, only plot the temperature
plot(temperature_masked[[i]], main = paste(month_name), alpha = 1)
text(0, 0, labels = paste("No fires in", month_name), cex = 1.5, col = "black")
}
}
mtext("Wildfire Distribution over Specific Months", outer = TRUE, cex = 1.5, line = 2)
The mean temperature across British Columbia has increased by 1.9°C between 1948 and 2016 (source), which is not noticeable on the plot. We assume that annual average temperature of Britith Columbia didn’t change at all between 2012 and 2017, and plot wildfire data points on each year.
Here I want to mention a limitation of this portion of data from geodata package. The temperature raster data has 12 layers, each representing a corresonding month. However, it doesn’t have an “year” attribute, which means annual average of each year can not be computed from this data set, the data of 12 months is probably observed among years to get an average temperature for each month at each location.
This limitation can be dealt with by accessing other functions in geodata to get data among years, however for some reason I got the error saying it exceeds my 16gb RAM, which is the main reason why I’m approximating annual temperature data without calculating it from the dataset.
# Set up a 2x3 grid of subplots (for 6 graphs)
par(mfrow = c(2, 3), mar = c(4, 4, 2, 1), oma = c(0, 0, 6, 0))
# Loop over each year (2012 to 2017)
for (i in 2012:2017) {
month_name <- month.abb # Month abbreviations
# Filter fire data for the current year
fire_data_year <- historical |>
filter(year == i)
avg_temp_layer <- temperature_masked[[7]]
plot(avg_temp_layer, main = paste(i), alpha = 1)
if (nrow(fire_data_year) > 0) {
fire_data_year_sf <- st_as_sf(fire_data_year, coords = c("longitude", "latitude"), crs = 4326)
fire_data_year_sf <- st_transform(fire_data_year_sf, crs = st_crs(temperature_masked))
plot(st_geometry(fire_data_year_sf), add = TRUE, col = "red", pch = 20, cex = 0.5)
} else { # if there is no wildfire
text(0, 0, labels = "No fires", cex = 1.5, col = "black")
}
}
mtext("Wildfire Distribution over Years", outer = TRUE, cex = 1.5, line = 2)
From the result, the conclusion can be drawn that there is a tendency for the wildfires to occur more at the south and/or east side of the province among these years.
month_to_layer <- setNames(1:12, month.abb)
# Add a column to `historical` for the corresponding layer index based on the month
historical <- historical |>
mutate(layer_index = month_to_layer[month])
# Extract temperature values for each wildfire based on location and month layer
historical <- historical |>
rowwise() |>
mutate(
temperature = extract(temperature_masked[[layer_index]], cbind(longitude, latitude))[[1]]
) |>
ungroup()
historical_temp <- historical |>
mutate(temp_rounded = round(temperature, digits = 0)) |> # Round to the nearest integer
filter(!is.na(temp_rounded)) # Remove rows where temp_rounded is NA
wildfire_counts <- historical_temp |>
group_by(temp_rounded) |>
summarize(wildfire_count = n())
ggplot(wildfire_counts, aes(x = temp_rounded, y = wildfire_count)) +
geom_bar(stat = "identity", fill = "orange", color = "black") +
labs(
title = "Wildfire Count by Temperature",
x = "Temperature (°C)",
y = "Wildfire Count"
) +
scale_x_continuous(
breaks = seq(
from = min(wildfire_counts$temp_rounded, na.rm = TRUE),
to = max(wildfire_counts$temp_rounded, na.rm = TRUE),
by = 1
)
) +
scale_y_continuous(
breaks = seq(
from = min(0),
to = max(105),
by = 10
)
) +
theme_minimal()
The graph suggests that generally, as monthly average temperature goes up, the occurrence of wildfire also become more frequently, this is especially obvious below 22°C. However, also notice that when average monthly temperature goes above 23°C, the occurrence of wildfire decreases. This unusual phenomenon can possibly be caused by:
historical_sf <- st_as_sf(historical, coords = c("longitude", "latitude"), crs = 4326)
# Convert historical data (sf object) to a SpatVector
historical_spat <- vect(historical_sf)
# Extract elevation values from the raster and add them to historical data
historical$elevation <- extract(elevation_masked, historical_spat)[, 2]
historical_df <- as.data.frame(historical)
# Round elevation and remove NAs
historical_elev <- historical_df |>
mutate(elevation_rounded = floor(elevation / 100) * 100) |> # Bin elevation in 50 meter intervals
filter(!is.na(elevation_rounded))
# Count the number of wildfires for each elevation level
wildfire_counts_elevation <- historical_elev |>
group_by(elevation_rounded) |>
summarize(wildfire_count = n())
# Plot the wildfire count against elevation
ggplot(wildfire_counts_elevation, aes(x = elevation_rounded, y = wildfire_count)) +
geom_bar(stat = "identity", fill = "steelblue", color = "black") +
labs(
title = "Wildfire Count by Elevation",
x = "Elevation Interval (m)",
y = "Wildfire Count"
) +
scale_x_continuous(
breaks = seq(
from = min(wildfire_counts_elevation$elevation_rounded, na.rm = TRUE),
to = max(wildfire_counts_elevation$elevation_rounded, na.rm = TRUE),
by = 200
)) +
scale_y_continuous(
breaks = seq(
from = min(0),
to = max(150),
by = 10
))+
theme_minimal()
The graph shows that among all possible elevation intervals, wildfire tend to occur the most when the elevation is between 400 meters and 1000 meters, considering that average elevation of the while British Columbia province is actually 708 meters (source), this is actually a reasonable result.
For elevation below 400 meters, wildfire tends to occur less than between 400 and 1000 meters. That could be resulted from higher humidity in these areas, or some other natural attributes of this area that limits the condition for wildfire.
For elevation above 1000 meters, wildfire tends to occur less than between 400 and 1000 meters. That could be resulted from lower temperature from these areas, higher average wind speed or some other factors.
But generally speaking, assume that the impact of humidity and temperature/wind speed is significant to the occurrence of wildfire, the conclusion is still obvious that wildfire tends to happen in lower elevation areas.
# Define the ArcGIS REST API URL with query parameters
url <- "https://services6.arcgis.com/ubm4tcTYICKBpist/arcgis/rest/services/BCWS_ActiveFires_PublicView/FeatureServer/0/query"
# Set query parameters for the API call
params <- list(
where = "1=1", # Selects all records
outFields = "*", # Requests all fields
f = "geojson" # Requests the response in GeoJSON format for spatial data
)
response <- GET(url, query = params)
if (http_status(response)$category == "Success") {
# Parse the GeoJSON response content
data_geojson <- content(response, as = "text")
current_fires <- st_read(data_geojson, quiet = TRUE)
} else {
stop("Failed to retrieve data from the API.")
}
current_fires <- current_fires %>%
mutate(IGNITION_DATE = as.POSIXct(IGNITION_DATE / 1000, origin = "1970-01-01", tz = "UTC"))
plot(elevation_masked, main = "Current Wildfire over Elevation")
plot(st_geometry(current_fires$geometry), add = TRUE, col = "red", pch = 20, cex = 0.5)
From this graph, compared to the graph above that shows how wildfire
distribution vary over years between 2012-2017, a summary can be made
that there is a tendency for wildfire to occur more and more in the
south east area, which is also the area with relatively higher
elevation. Let’s check this fact even more.
current_sf <- st_as_sf(current_fires, coords = c("longitude", "latitude"), crs = 4326)
# Convert historical data (sf object) to a SpatVector
current_spat <- vect(current_sf)
# Extract elevation values from the raster and add them to historical data
current_fires$elevation <- extract(elevation_masked, current_spat)[, 2]
current_df <- as.data.frame(current_fires)
current_elev <- current_df |>
mutate(elevation_rounded = floor(elevation / 100) * 100) |> # Bin elevation in 50 meter intervals
filter(!is.na(elevation_rounded))
wildfire_counts_elevation <- current_elev |>
group_by(elevation_rounded) |>
summarize(wildfire_count = n())
ggplot(wildfire_counts_elevation, aes(x = elevation_rounded, y = wildfire_count)) +
geom_bar(stat = "identity", fill = "steelblue", color = "black") +
labs(
title = "Wildfire Count by Elevation",
x = "Elevation Interval (m)",
y = "Wildfire Count"
) +
scale_x_continuous(
breaks = seq(
from = min(wildfire_counts_elevation$elevation_rounded, na.rm = TRUE),
to = max(wildfire_counts_elevation$elevation_rounded, na.rm = TRUE),
by = 200
)) +
scale_y_continuous(
breaks = seq(
from = min(0),
to = max(150),
by = 10
))+
theme_minimal()
from this graph a different pattern can be seen compared to the graph plotted from historical data. Wildfires are moving from lower elevation area to higher elevation area. In 2024, wildfire occur the most in areas with elevation between 600 meters and 1400 meters, which is significantly higher than 400 to 1000 meters as we saw before.
Now let’s check the difference between historical wildfire and current wildfire in terms of temperature.
current_fires <- st_as_sf(current_fires)
current_fires <- current_fires |>
mutate(
coords = st_coordinates(geometry), # Extract coordinates from geometry
longitude = coords[, 1], # Assign longitude
latitude = coords[, 2] # Assign latitude
)
current_fires <- current_fires |>
mutate(month = month(ymd_hms(IGNITION_DATE)))
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `month = month(ymd_hms(IGNITION_DATE))`.
## Caused by warning:
## ! 15 failed to parse.
month_to_layer <- setNames(1:12, month.abb)
current_fires <- current_fires |>
mutate(layer_index = month_to_layer[month])
current_fires <- current_fires |>
filter(!is.na(month))
current_fires <- current_fires |>
rowwise() |>
mutate(
temperature = extract(temperature_masked[[layer_index]], cbind(longitude, latitude))[[1]]
) |>
ungroup()
current_temp <- current_fires |>
mutate(temp_rounded = round(temperature, digits = 0)) |> # Round to the nearest integer
filter(!is.na(temp_rounded)) # Remove rows where temp_rounded is NA
wildfire_counts <- current_temp |>
group_by(temp_rounded) |>
summarize(wildfire_count = n())
ggplot(wildfire_counts, aes(x = temp_rounded, y = wildfire_count)) +
geom_bar(stat = "identity", fill = "orange", color = "black") +
labs(
title = "Wildfire Count by Temperature (2024)",
x = "Temperature (°C)",
y = "Wildfire Count"
) +
scale_x_continuous(
breaks = seq(
from = min(wildfire_counts$temp_rounded, na.rm = TRUE),
to = max(wildfire_counts$temp_rounded, na.rm = TRUE),
by = 1
)
) +
scale_y_continuous(
breaks = seq(
from = min(0),
to = max(150),
by = 20
)
) +
theme_minimal()
Compare between the two graphs, it is obvious that wildfires now happen more between 16°C and 24°C, this is different from historical wildfire data. However, the fact that there were wildfire that occurred at super low temperature or when temperature is higher than 25, may still be caused by other factors like human activity or lack of data points.
Last thing I want to mention about the data itself, is about the Y-axis of the two graphs here. The number of data points sum up to about 1000, which is not possible in my opinion. There can’t be 1000 wildfire in British Columbia in 2024. So all the analysis of the last section was done with the assumption that the data set contains different observations of each wildfire.
However, this can be considered as advantange too, because this will give a very detailed pattern of wildfire situation in year 2024, and I was comparing the general pattern to the historical data, not the numbers, so the issue of current data set doesn’t affect my conclusion.